1. Explore Data

Import required libraries

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import itertools
import datetime
import matplotlib.cm as cm
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
%matplotlib inline

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
C:\Users\shiva\Anaconda3\lib\site-packages\statsmodels\tools\_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm

Load the data

In [3]:
df = pd.read_excel('Homeless in U.S.updated.xlsx')
df['Percentage 1'] = df['Percentage 1']*100
df['Percentage 2 - USICH'] = df['Percentage 2 - USICH']*100
df.drop(['Unnamed: 5'], axis=1, inplace=True)
df.head()
Out[3]:
State Total Homeless Population Percentage 1 Percentage 2 - USICH Homeless Households Veterans Young Adults
0 California 151278.0 39512223.0 0.382864 26.73 7044.0 10980.0 11993.0
1 New York 92091.0 19453561.0 0.473389 16.27 15091.0 1270.0 2978.0
2 Florida 28328.0 21477737.0 0.131895 5.01 2171.0 2472.0 1450.0
3 Texas 25848.0 28995881.0 0.089144 4.57 1919.0 1806.0 1355.0
4 Washington 21577.0 7614893.0 0.283353 3.81 1751.0 1585.0 1911.0
In [4]:
df.describe()
Out[4]:
Total Homeless Population Percentage 1 Percentage 2 - USICH Homeless Households Veterans Young Adults
count 51.000000 5.100000e+01 51.000000 51.000000 50.000000 50.000000 50.000000
mean 11023.215686 6.437266e+06 0.154667 1.948627 1031.980000 733.160000 693.920000
std 24161.640344 7.360740e+06 0.147404 4.268965 2324.411073 1561.141084 1723.176372
min 548.000000 5.787590e+05 0.039783 0.100000 37.000000 49.000000 26.000000
25% 2315.000000 1.789606e+06 0.085467 0.410000 187.500000 178.500000 147.250000
50% 4538.000000 4.467673e+06 0.099773 0.800000 309.000000 403.500000 258.000000
75% 9466.500000 7.446805e+06 0.148006 1.675000 948.500000 687.250000 576.500000
max 151278.000000 3.951222e+07 0.923983 26.730000 15091.000000 10980.000000 11993.000000

Process data

In [5]:
df = df.iloc[:51, :]
df = df[df.State != 'D.C.']
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 50 entries, 0 to 50
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   State                 50 non-null     object 
 1   Total Homeless        50 non-null     float64
 2   Population            50 non-null     float64
 3   Percentage 1          50 non-null     float64
 4   Percentage 2 - USICH  50 non-null     float64
 5   Homeless Households   50 non-null     float64
 6   Veterans              50 non-null     float64
 7   Young Adults          50 non-null     float64
dtypes: float64(7), object(1)
memory usage: 3.5+ KB
In [6]:
df
Out[6]:
State Total Homeless Population Percentage 1 Percentage 2 - USICH Homeless Households Veterans Young Adults
0 California 151278.0 39512223.0 0.382864 26.73 7044.0 10980.0 11993.0
1 New York 92091.0 19453561.0 0.473389 16.27 15091.0 1270.0 2978.0
2 Florida 28328.0 21477737.0 0.131895 5.01 2171.0 2472.0 1450.0
3 Texas 25848.0 28995881.0 0.089144 4.57 1919.0 1806.0 1355.0
4 Washington 21577.0 7614893.0 0.283353 3.81 1751.0 1585.0 1911.0
5 Massachusetts 18471.0 6949503.0 0.265789 3.26 3766.0 917.0 480.0
6 Oregon 15876.0 4217737.0 0.376410 2.81 1147.0 1438.0 1590.0
7 Pennsylvania 13199.0 12801989.0 0.103101 2.33 1569.0 857.0 737.0
8 Georgia 10443.0 10617423.0 0.098357 1.85 815.0 801.0 596.0
9 Ohio 10345.0 11689100.0 0.088501 1.83 999.0 676.0 643.0
10 Illinois 10199.0 12671821.0 0.080486 1.80 1105.0 690.0 609.0
11 Arizona 10007.0 7278717.0 0.137483 1.77 745.0 910.0 587.0
12 Colorado 9619.0 5758736.0 0.167033 1.70 719.0 1068.0 545.0
13 North Carolina 9314.0 10488084.0 0.088806 1.65 767.0 907.0 417.0
14 New Jersey 8862.0 8882190.0 0.099773 1.57 993.0 551.0 496.0
15 Michigan 8575.0 9986857.0 0.085863 1.52 1022.0 599.0 489.0
16 Minnesota 7977.0 5639632.0 0.141445 1.41 1028.0 297.0 685.0
17 Tennessee 7467.0 6833174.0 0.109276 1.32 558.0 679.0 366.0
18 Nevada 7169.0 3080156.0 0.232748 1.27 183.0 674.0 1285.0
19 Maryland 6561.0 6045680.0 0.108524 1.16 603.0 490.0 290.0
21 Hawaii 6412.0 1415872.0 0.452866 1.13 539.0 505.0 222.0
22 Missouri 6179.0 6137428.0 0.100677 1.09 707.0 488.0 477.0
23 Virginia 5783.0 8535519.0 0.067752 1.02 652.0 447.0 258.0
24 Indiana 5471.0 6732219.0 0.081266 0.97 544.0 572.0 258.0
25 Wisconsin 4538.0 5822434.0 0.077940 0.80 592.0 359.0 200.0
26 South Carolina 4172.0 5148714.0 0.081030 0.74 275.0 462.0 216.0
27 Kentucky 4079.0 4467673.0 0.091300 0.72 313.0 447.0 211.0
28 Oklahoma 3944.0 3956971.0 0.099672 0.70 300.0 280.0 322.0
29 Alabama 3261.0 4903185.0 0.066508 0.58 236.0 292.0 320.0
30 New Mexico 3241.0 2096829.0 0.154567 0.57 246.0 257.0 216.0
31 Connecticut 3033.0 3565287.0 0.085070 0.54 305.0 195.0 196.0
32 Louisiana 2941.0 4648794.0 0.063264 0.52 170.0 360.0 188.0
33 Utah 2798.0 3205958.0 0.087275 0.49 260.0 211.0 163.0
34 Arkansas 2717.0 3017825.0 0.090032 0.48 132.0 238.0 336.0
35 Nebraska 2365.0 1934408.0 0.122260 0.42 201.0 175.0 151.0
36 Kansas 2381.0 2913314.0 0.081728 0.42 209.0 189.0 122.0
37 Idaho 2315.0 1787065.0 0.129542 0.41 243.0 201.0 188.0
38 Iowa 2315.0 3155070.0 0.073374 0.41 227.0 149.0 146.0
39 Maine 2106.0 1344212.0 0.156672 0.37 283.0 116.0 125.0
40 Alaska 1907.0 731545.0 0.260681 0.34 161.0 111.0 176.0
41 New Hampshire 1396.0 1359711.0 0.102669 0.25 206.0 113.0 84.0
42 West Virginia 1397.0 1792147.0 0.077951 0.25 79.0 137.0 89.0
43 Montana 1357.0 1068778.0 0.126967 0.24 136.0 205.0 82.0
44 Mississippi 1184.0 2976149.0 0.039783 0.21 69.0 72.0 56.0
45 Vermont 1089.0 623989.0 0.174522 0.19 128.0 87.0 105.0
46 Rhode Island 1055.0 1059361.0 0.099588 0.19 111.0 92.0 26.0
47 South Dakota 995.0 884659.0 0.112473 0.18 76.0 66.0 66.0
48 Delaware 921.0 973764.0 0.094581 0.16 116.0 65.0 42.0
49 Wyoming 548.0 578759.0 0.094685 0.10 37.0 51.0 81.0
50 North Dakota 557.0 762062.0 0.073091 0.10 51.0 49.0 72.0
In [7]:
#!pip install squarify
import squarify 
df_copy = df.copy()

us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}
df_copy['Statecode'] = df_copy['State'].map(us_state_abbrev)
In [8]:
df_copy.head()
Out[8]:
State Total Homeless Population Percentage 1 Percentage 2 - USICH Homeless Households Veterans Young Adults Statecode
0 California 151278.0 39512223.0 0.382864 26.73 7044.0 10980.0 11993.0 CA
1 New York 92091.0 19453561.0 0.473389 16.27 15091.0 1270.0 2978.0 NY
2 Florida 28328.0 21477737.0 0.131895 5.01 2171.0 2472.0 1450.0 FL
3 Texas 25848.0 28995881.0 0.089144 4.57 1919.0 1806.0 1355.0 TX
4 Washington 21577.0 7614893.0 0.283353 3.81 1751.0 1585.0 1911.0 WA

EDA

In [9]:
## Setting up the configurations
sns.set(context = 'talk', style = 'darkgrid', font_scale = 1)
plt.style.use('fivethirtyeight')
pd.set_option('display.max_columns', 500)
plt.rcParams['axes.unicode_minus'] = False

rc={'font.size': 16, 'axes.labelsize': 17, 'legend.fontsize': 16,\
    'axes.titlesize': 16, 'xtick.labelsize': 16, 'ytick.labelsize': 17, "lines.linewidth": 2.5}

sns.set(context = 'poster', style = 'darkgrid', font_scale = 1, palette = 'muted', rc = rc)
sns.set_style("darkgrid",{"font.sans-serif":['simhei', 'Arial']})

Barplot

Total Homeless by each State (Percentage)

In [10]:
cmap = sns.color_palette("Blues", n_colors = 10)
ax = df.sort_values('Total Homeless' ,ascending=True)[['State','Total Homeless']]\
        .plot.barh(legend = False, stacked=True, title = 'Total % Homeless by State', x = 'State', figsize = (17,15));
ax.set_alpha(0.8)

# create a list to collect the plt.patches data
totals = []

# find the values and append to list
for i in ax.patches:
    totals.append(i.get_width())

# set individual bar lables using above list
total = sum(totals)

# set individual bar lables using above list
for i in ax.patches:
    # get_width pulls left or right; get_y pushes up or down
    ax.text(i.get_width()+.4, i.get_y(), \
            str(round((i.get_width()/total)*100, 2))+'%', fontsize=15,
color='dimgrey')

plt.show();

Total Homeless by each State (Counts)

In [11]:
ax = df.sort_values('Total Homeless' ,ascending=True)[['State','Total Homeless']]\
        .plot.barh(legend = False, stacked=True, title = 'Total Homeless by State', x = 'State', cmap = plt.get_cmap('Spectral'), figsize = (17,15));
ax.set_alpha(0.8)

# create a list to collect the plt.patches data
totals = []

# find the values and append to list
for i in ax.patches:
    totals.append(i.get_width())

# set individual bar lables using above list
total = sum(totals)

# set individual bar lables using above list
for i in ax.patches:
    # get_width pulls left or right; get_y pushes up or down
    ax.text(i.get_width()+.4, i.get_y(), \
            str(int(i.get_width())), fontsize=15,
color='dimgrey')

plt.show();

Lollipon Plot

Percentage of Homeless to the Total Population

In [12]:
dfpct = df.sort_values(by = 'Percentage 1', axis=0, ascending = True).reset_index(drop = True)
dfpct
Out[12]:
State Total Homeless Population Percentage 1 Percentage 2 - USICH Homeless Households Veterans Young Adults
0 Mississippi 1184.0 2976149.0 0.039783 0.21 69.0 72.0 56.0
1 Louisiana 2941.0 4648794.0 0.063264 0.52 170.0 360.0 188.0
2 Alabama 3261.0 4903185.0 0.066508 0.58 236.0 292.0 320.0
3 Virginia 5783.0 8535519.0 0.067752 1.02 652.0 447.0 258.0
4 North Dakota 557.0 762062.0 0.073091 0.10 51.0 49.0 72.0
5 Iowa 2315.0 3155070.0 0.073374 0.41 227.0 149.0 146.0
6 Wisconsin 4538.0 5822434.0 0.077940 0.80 592.0 359.0 200.0
7 West Virginia 1397.0 1792147.0 0.077951 0.25 79.0 137.0 89.0
8 Illinois 10199.0 12671821.0 0.080486 1.80 1105.0 690.0 609.0
9 South Carolina 4172.0 5148714.0 0.081030 0.74 275.0 462.0 216.0
10 Indiana 5471.0 6732219.0 0.081266 0.97 544.0 572.0 258.0
11 Kansas 2381.0 2913314.0 0.081728 0.42 209.0 189.0 122.0
12 Connecticut 3033.0 3565287.0 0.085070 0.54 305.0 195.0 196.0
13 Michigan 8575.0 9986857.0 0.085863 1.52 1022.0 599.0 489.0
14 Utah 2798.0 3205958.0 0.087275 0.49 260.0 211.0 163.0
15 Ohio 10345.0 11689100.0 0.088501 1.83 999.0 676.0 643.0
16 North Carolina 9314.0 10488084.0 0.088806 1.65 767.0 907.0 417.0
17 Texas 25848.0 28995881.0 0.089144 4.57 1919.0 1806.0 1355.0
18 Arkansas 2717.0 3017825.0 0.090032 0.48 132.0 238.0 336.0
19 Kentucky 4079.0 4467673.0 0.091300 0.72 313.0 447.0 211.0
20 Delaware 921.0 973764.0 0.094581 0.16 116.0 65.0 42.0
21 Wyoming 548.0 578759.0 0.094685 0.10 37.0 51.0 81.0
22 Georgia 10443.0 10617423.0 0.098357 1.85 815.0 801.0 596.0
23 Rhode Island 1055.0 1059361.0 0.099588 0.19 111.0 92.0 26.0
24 Oklahoma 3944.0 3956971.0 0.099672 0.70 300.0 280.0 322.0
25 New Jersey 8862.0 8882190.0 0.099773 1.57 993.0 551.0 496.0
26 Missouri 6179.0 6137428.0 0.100677 1.09 707.0 488.0 477.0
27 New Hampshire 1396.0 1359711.0 0.102669 0.25 206.0 113.0 84.0
28 Pennsylvania 13199.0 12801989.0 0.103101 2.33 1569.0 857.0 737.0
29 Maryland 6561.0 6045680.0 0.108524 1.16 603.0 490.0 290.0
30 Tennessee 7467.0 6833174.0 0.109276 1.32 558.0 679.0 366.0
31 South Dakota 995.0 884659.0 0.112473 0.18 76.0 66.0 66.0
32 Nebraska 2365.0 1934408.0 0.122260 0.42 201.0 175.0 151.0
33 Montana 1357.0 1068778.0 0.126967 0.24 136.0 205.0 82.0
34 Idaho 2315.0 1787065.0 0.129542 0.41 243.0 201.0 188.0
35 Florida 28328.0 21477737.0 0.131895 5.01 2171.0 2472.0 1450.0
36 Arizona 10007.0 7278717.0 0.137483 1.77 745.0 910.0 587.0
37 Minnesota 7977.0 5639632.0 0.141445 1.41 1028.0 297.0 685.0
38 New Mexico 3241.0 2096829.0 0.154567 0.57 246.0 257.0 216.0
39 Maine 2106.0 1344212.0 0.156672 0.37 283.0 116.0 125.0
40 Colorado 9619.0 5758736.0 0.167033 1.70 719.0 1068.0 545.0
41 Vermont 1089.0 623989.0 0.174522 0.19 128.0 87.0 105.0
42 Nevada 7169.0 3080156.0 0.232748 1.27 183.0 674.0 1285.0
43 Alaska 1907.0 731545.0 0.260681 0.34 161.0 111.0 176.0
44 Massachusetts 18471.0 6949503.0 0.265789 3.26 3766.0 917.0 480.0
45 Washington 21577.0 7614893.0 0.283353 3.81 1751.0 1585.0 1911.0
46 Oregon 15876.0 4217737.0 0.376410 2.81 1147.0 1438.0 1590.0
47 California 151278.0 39512223.0 0.382864 26.73 7044.0 10980.0 11993.0
48 Hawaii 6412.0 1415872.0 0.452866 1.13 539.0 505.0 222.0
49 New York 92091.0 19453561.0 0.473389 16.27 15091.0 1270.0 2978.0
In [17]:
fig, ax = plt.subplots(figsize = (16,13))

ax.hlines(dfpct.State, xmin=0, xmax=dfpct['Percentage 1'])
ax.plot(dfpct['Percentage 1'], dfpct.State, "o", color='red')
ax.set_xlim(dfpct['Percentage 1'].min(), dfpct['Percentage 1'].max()+0.001)

    
# Decorate
plt.title('Percentage of Homeless to the Total Population', fontdict={'size':20});
plt.grid(linestyle='--', alpha=0.5);

plt.show();
In [61]:
params = {'legend.fontsize': '20',
          'figure.figsize': (15, 5),
         'axes.labelsize': '18',
         'axes.titlesize':'30',
         'xtick.labelsize':'16',
         'ytick.labelsize':'16'}
plt.rcParams.update(params)

plt.rcParams['text.color'] = '#A04000'
plt.rcParams['xtick.color'] = '#800000'
plt.rcParams['ytick.color'] = '#808000'
plt.rcParams['axes.labelcolor'] = '#283747'
In [62]:
from decimal import Decimal
pd.options.display.float_format = '{:.2f}'.format
ax = df.sort_values('Percentage 1' ,ascending=False)[['State','Percentage 1']]\
        .plot.bar(legend = False, stacked=True, title = 'Percentage of Homeless Households by Percentage 1 by State', \
                   x = 'State', color=[plt.cm.Spectral(np.arange(len(df.State)))], figsize = (17,8), width = 1);
plt.xticks(rotation=90)
plt.tight_layout()

plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')


pos = 0
ax.patches[pos].set_facecolor('#000000')

pos = 1
ax.patches[pos].set_facecolor('#000000')

for pos in range(28,50):
    ax.patches[pos].set_facecolor('#fff200')

plt.show();
In [63]:
ax = df.sort_values('Percentage 2 - USICH' ,ascending=False)[['State','Percentage 2 - USICH']]\
        .plot.bar(legend = False, stacked=True, title = 'Percentage of Homeless Households by Percentage 1 by State (USICH)', \
                   x = 'State', color=[plt.cm.Spectral(np.arange(len(df.State)))], figsize = (17,8), width = 1);
plt.xticks(rotation=90)
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')

pos = 0
ax.patches[pos].set_facecolor('#000000')

pos = 1
ax.patches[pos].set_facecolor('#000000')

for pos in range(28,50):
    ax.patches[pos].set_facecolor('#fff200')

plt.show();
In [80]:
# With Log axis
df_pct_grp = df.set_index('State');
df_pct_grp[['Percentage 2 - USICH', 'Percentage 1']].plot(kind='bar', figsize = (17,7), width = 1, logy = True, stacked=False);

plt.title('Percentage of Homeless Households by Percentages')
plt.xticks(rotation=90)
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')

plt.show();
In [79]:
# Without Log axis
df_pct_grp = df.set_index('State');
df_pct_grp[['Percentage 2 - USICH', 'Percentage 1']].plot(kind='bar', figsize = (17,7), width = 1, logy = False, stacked=False);

plt.title('Percentage of Homeless Households by Percentages')
plt.xticks(rotation=90)
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')

plt.show();
In [82]:
# With Log axis and Stacked
df_pct_grp = df.set_index('State');
df_pct_grp[['Percentage 2 - USICH', 'Percentage 1']].plot(kind='bar', figsize = (17,7), width = 1, logy = True, stacked=True);

plt.title('Percentage of Homeless Households by Percentages')
plt.xticks(rotation=90)
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')

plt.show();
In [81]:
# Without Log axis and Stacked
df_pct_grp = df.set_index('State');
df_pct_grp[['Percentage 2 - USICH', 'Percentage 1']].plot(kind='bar', figsize = (17,7), width = 1, logy = False, stacked=True);

plt.title('Percentage of Homeless Households by Percentages')
plt.xticks(rotation=90)
plt.xlabel('State')
plt.ylabel('Percentage of Homeless by Population')

plt.show();

Treemap

Treemap of Total Homeless

In [86]:
params = {'legend.fontsize': '20',
          'figure.figsize': (15, 5),
         'axes.labelsize': '18',
         'axes.titlesize':'30',
         'xtick.labelsize':'16',
         'ytick.labelsize':'16'}
plt.rcParams.update(params)

plt.rcParams['text.color'] = '#000000'
plt.rcParams['xtick.color'] = '#800000'
plt.rcParams['ytick.color'] = '#808000'
plt.rcParams['axes.labelcolor'] = '#283747'
In [87]:
df = df_copy['Total Homeless']
labels = df_copy.Statecode
sizes = df_copy['Total Homeless']
labels_ = pd.Series([str(i) + 'k' for i in list(np.round(sizes.values/1000,2))])
final_labels = labels  + '\n' + labels_
    
colors = [plt.cm.Spectral(i/float(len(labels))) for i in range(len(labels))]

# Draw Plot
plt.figure(figsize=(15,9), dpi= 300)
squarify.plot(sizes=sizes, label=final_labels, color=colors, bar_kwargs={'alpha':.9},text_kwargs={'fontsize':11})

# Decorate
plt.title('Treemap of Total Homeless')
plt.axis('off')
plt.show();

Stacked Bars

Total Homeless by Groups - Veterans/Young Adults

In [88]:
df_copy
Out[88]:
State Total Homeless Population Percentage 1 Percentage 2 - USICH Homeless Households Veterans Young Adults Statecode
0 California 151278.00 39512223.00 0.38 26.73 7044.00 10980.00 11993.00 CA
1 New York 92091.00 19453561.00 0.47 16.27 15091.00 1270.00 2978.00 NY
2 Florida 28328.00 21477737.00 0.13 5.01 2171.00 2472.00 1450.00 FL
3 Texas 25848.00 28995881.00 0.09 4.57 1919.00 1806.00 1355.00 TX
4 Washington 21577.00 7614893.00 0.28 3.81 1751.00 1585.00 1911.00 WA
5 Massachusetts 18471.00 6949503.00 0.27 3.26 3766.00 917.00 480.00 MA
6 Oregon 15876.00 4217737.00 0.38 2.81 1147.00 1438.00 1590.00 OR
7 Pennsylvania 13199.00 12801989.00 0.10 2.33 1569.00 857.00 737.00 PA
8 Georgia 10443.00 10617423.00 0.10 1.85 815.00 801.00 596.00 GA
9 Ohio 10345.00 11689100.00 0.09 1.83 999.00 676.00 643.00 OH
10 Illinois 10199.00 12671821.00 0.08 1.80 1105.00 690.00 609.00 IL
11 Arizona 10007.00 7278717.00 0.14 1.77 745.00 910.00 587.00 AZ
12 Colorado 9619.00 5758736.00 0.17 1.70 719.00 1068.00 545.00 CO
13 North Carolina 9314.00 10488084.00 0.09 1.65 767.00 907.00 417.00 NC
14 New Jersey 8862.00 8882190.00 0.10 1.57 993.00 551.00 496.00 NJ
15 Michigan 8575.00 9986857.00 0.09 1.52 1022.00 599.00 489.00 MI
16 Minnesota 7977.00 5639632.00 0.14 1.41 1028.00 297.00 685.00 MN
17 Tennessee 7467.00 6833174.00 0.11 1.32 558.00 679.00 366.00 TN
18 Nevada 7169.00 3080156.00 0.23 1.27 183.00 674.00 1285.00 NV
19 Maryland 6561.00 6045680.00 0.11 1.16 603.00 490.00 290.00 MD
21 Hawaii 6412.00 1415872.00 0.45 1.13 539.00 505.00 222.00 HI
22 Missouri 6179.00 6137428.00 0.10 1.09 707.00 488.00 477.00 MO
23 Virginia 5783.00 8535519.00 0.07 1.02 652.00 447.00 258.00 VA
24 Indiana 5471.00 6732219.00 0.08 0.97 544.00 572.00 258.00 IN
25 Wisconsin 4538.00 5822434.00 0.08 0.80 592.00 359.00 200.00 WI
26 South Carolina 4172.00 5148714.00 0.08 0.74 275.00 462.00 216.00 SC
27 Kentucky 4079.00 4467673.00 0.09 0.72 313.00 447.00 211.00 KY
28 Oklahoma 3944.00 3956971.00 0.10 0.70 300.00 280.00 322.00 OK
29 Alabama 3261.00 4903185.00 0.07 0.58 236.00 292.00 320.00 AL
30 New Mexico 3241.00 2096829.00 0.15 0.57 246.00 257.00 216.00 NM
31 Connecticut 3033.00 3565287.00 0.09 0.54 305.00 195.00 196.00 CT
32 Louisiana 2941.00 4648794.00 0.06 0.52 170.00 360.00 188.00 LA
33 Utah 2798.00 3205958.00 0.09 0.49 260.00 211.00 163.00 UT
34 Arkansas 2717.00 3017825.00 0.09 0.48 132.00 238.00 336.00 AR
35 Nebraska 2365.00 1934408.00 0.12 0.42 201.00 175.00 151.00 NE
36 Kansas 2381.00 2913314.00 0.08 0.42 209.00 189.00 122.00 KS
37 Idaho 2315.00 1787065.00 0.13 0.41 243.00 201.00 188.00 ID
38 Iowa 2315.00 3155070.00 0.07 0.41 227.00 149.00 146.00 IA
39 Maine 2106.00 1344212.00 0.16 0.37 283.00 116.00 125.00 ME
40 Alaska 1907.00 731545.00 0.26 0.34 161.00 111.00 176.00 AK
41 New Hampshire 1396.00 1359711.00 0.10 0.25 206.00 113.00 84.00 NH
42 West Virginia 1397.00 1792147.00 0.08 0.25 79.00 137.00 89.00 WV
43 Montana 1357.00 1068778.00 0.13 0.24 136.00 205.00 82.00 MT
44 Mississippi 1184.00 2976149.00 0.04 0.21 69.00 72.00 56.00 MS
45 Vermont 1089.00 623989.00 0.17 0.19 128.00 87.00 105.00 VT
46 Rhode Island 1055.00 1059361.00 0.10 0.19 111.00 92.00 26.00 RI
47 South Dakota 995.00 884659.00 0.11 0.18 76.00 66.00 66.00 SD
48 Delaware 921.00 973764.00 0.09 0.16 116.00 65.00 42.00 DE
49 Wyoming 548.00 578759.00 0.09 0.10 37.00 51.00 81.00 WY
50 North Dakota 557.00 762062.00 0.07 0.10 51.00 49.00 72.00 ND
In [91]:
df_copy_grp = df_copy.set_index('State');
df_copy_grp[['Veterans','Young Adults']].plot(kind='bar', figsize = (17,9), stacked=False,logy=True, width = 0.75);

Scatterplot

Population versus Total Homeless by State

In [92]:
plt.figure(figsize=(17,10), dpi= 300)
plt.style.use('fivethirtyeight')
colors = cm.rainbow(np.linspace(0, 1, 50))
ax = plt.scatter(df_copy['Total Homeless'], df_copy['Population'], s=df_copy['Total Homeless'],c = colors,  alpha=0.1, cmap = plt.get_cmap('Spectral'))
plt.ylabel('Population')
plt.xlabel('Total Homeless')
plt.title('Population versus Total Homeless')

for label, x, y in zip(list(df_copy['Statecode']), df_copy['Total Homeless'], df_copy['Population']):
    plt.annotate(
        label,
        xy=(x, y), xytext=(-20, 20),
        textcoords='offset points', ha='right', va='bottom',
        bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.2),
        arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
    
plt.yscale('log')
plt.xscale('log')
plt.show();

Dotplot

Homeless Households by each State

In [93]:
df_copy.sort_values('Homeless Households' ,ascending=True)[['State','Homeless Households']]\
        .plot.barh(legend = False, stacked=True, title = 'Homeless Households by State', x = 'State', color = 'orange', figsize = (17,14));
plt.xticks(rotation=60)
plt.show();

Diverging Bars

If you want to see the varyiation based on a single metric and visualize the order and amount of this variance, the diverging bars is a great tool. It helps to quickly differentiate the performance of items(for example State in this case) and is quite intuitive and instantly conveys the point.

In [94]:
x = df_copy.loc[:, ['Homeless Households']]
df_copy['mean Homeless Households'] = (x - x.mean())/x.std()
df_copy['colors'] = ['red' if x < 0 else 'green' for x in df_copy['mean Homeless Households']]
df_copy.sort_values('mean Homeless Households', inplace=True)
df_copy = df_copy.reset_index(drop = True)

# Draw plot
plt.figure(figsize=(14,10), dpi= 300)
plt.hlines(y=df_copy.index, xmin=0, xmax=df_copy['mean Homeless Households'], color=df_copy.colors, alpha=0.4, linewidth=5)

# Decorations
plt.gca().set(ylabel='$State$', xlabel='$Homeless Households$')
plt.yticks(df_copy.index, df_copy.State, fontsize=12)
plt.title('Homeless Households normalised to the average Values', fontdict={'size':20})
plt.grid(linestyle='--', alpha=0.5)
plt.show();
In [95]:
x = df_copy.loc[:, ['Total Homeless']]
df_copy['mean Total Homeless'] = (x - x.mean())/x.std()
df_copy['colors'] = ['red' if x > 0 else 'green' for x in df_copy['mean Total Homeless']]
df_copy.sort_values('mean Total Homeless', inplace=True)
df_copy = df_copy.reset_index(drop = True)

# Draw plot
plt.figure(figsize=(14,10), dpi= 300)
plt.hlines(y=df_copy.index, xmin=0, xmax=df_copy['mean Total Homeless'], color=df_copy.colors, alpha=0.4, linewidth=5)

# Decorations
plt.gca().set(ylabel='$State$', xlabel='$Total Homeless$')
plt.yticks(df_copy.index, df_copy.State, fontsize=12)
plt.title('Total Homeless normalised to the average Values', fontdict={'size':20})
plt.grid(linestyle='--', alpha=0.5)
plt.show();
In [ ]:
 
In [ ]: